1 Mammal Metabolic Rates

1.1 Data

Predictors: body mass interacting with habitat, effect of >10 kg.

1.1.1 Read in data

mammal_bmr <- read.csv("data/bmr.csv") 

1.1.2 Cleaning

Rename some variables and create species name from genus and species.

mammal_bmr <- mammal_bmr %>%
  rename(source = Source,
         order = order.corrected,
        # n.animals = Number.of.Animals,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And “factor” “chr” variables.

mammal_bmr <- mammal_bmr %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10.bmr,
                diet,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

1.1.3 Data Visualization

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10.bmr ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_bmr,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(Mass (kg))',
          y = 'Log10(BMR (kcal/day))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

1.2 GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat, effect of >10 kg.

lm_model <- lm(log10.bmr ~ log10.body.mass * habitat + 
                 log10.body.mass*above.10kg, # + diet,
                 data = mammal_bmr)
tab_model(lm_model) 
  log 10.bmr
Predictors Estimates CI p
(Intercept) 1.88 1.69 – 2.07 <0.001
log10.body.mass 0.77 0.68 – 0.86 <0.001
habitat [land] -0.19 -0.37 – -0.01 0.041
above.10kg [Smaller] 0.04 -0.10 – 0.19 0.547
log10.body.mass * habitat
[land]
0.04 -0.06 – 0.14 0.470
log10.body.mass *
above.10kg [Smaller]
-0.12 -0.21 – -0.03 0.006
Observations 722
R2 / R2 adjusted 0.961 / 0.961
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 231.2 1 8907 0
habitat 0.2168 1 8.355 0.003963
above.10kg 0.5105 1 19.67 1.066e-05
log10.body.mass:habitat 0.01359 1 0.5235 0.4696
log10.body.mass:above.10kg 0.1961 1 7.556 0.006131
Residuals 18.58 716 NA NA

For more on formatting the fitted model table see: https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html

1.2.1 Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_bmr <- mammal_bmr  %>%
  mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_bmr)

acf(resid(lm_model))

gf_dhistogram(~lm_resids, data = mammal_bmr,
              bins = 20) %>%
  gf_fitdistr()

1.2.2 Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(10^lm_fitted ~ 10^log10.body.mass,
         color = ~habitat,
         data = mammal_bmr) %>%
  gf_ribbon(10^lm_fit_lo + 10^lm_fit_hi ~ 10^log10.body.mass,
            color = ~habitat, fill = ~habitat) %>%
  # gf_facet_grid(~ diet) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10'),
            scale_y_continuous(trans = 'log10'))

gf_point(log10.bmr ~ lm_fitted, data = mammal_bmr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(BMR)',
          x = 'Model-predicted log10(BMR)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

Can refine the plot of predictions later on.

1.3 Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.

# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat +
                      log10.body.mass*above.10kg + # diet +
                      (1 | order/genus/species),
                 data = mammal_bmr)

tab_model(re_model) 
  log 10.bmr
Predictors Estimates CI p
(Intercept) 1.85 1.67 – 2.03 <0.001
log10.body.mass 0.74 0.65 – 0.82 <0.001
habitat [land] -0.25 -0.40 – -0.09 0.002
above.10kg [Smaller] 0.06 -0.07 – 0.19 0.346
log10.body.mass * habitat
[land]
0.05 -0.04 – 0.14 0.272
log10.body.mass *
above.10kg [Smaller]
-0.10 -0.19 – -0.02 0.020
Random Effects
σ2 0.00
τ00 species:(genus:order) 0.01
τ00 genus:order 0.01
τ00 order 0.02
ICC 0.97
N species 626
N genus 415
N order 24
Observations 722
Marginal R2 / Conditional R2 0.948 / 0.998
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 5370 1 0
habitat 16.96 1 3.809e-05
above.10kg 7.427 1 0.006426
log10.body.mass:habitat 1.206 1 0.2721
log10.body.mass:above.10kg 5.412 1 0.02

1.3.1 Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_bmr <- mammal_bmr %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)

# save fitted model and data
saveRDS(mammal_bmr, 'fitted-models/mr-data.RDS')
saveRDS(re_model, 'fitted-models/mr-re-model.RDS')


gf_point(re_resids ~ re_ind_fitted, data = mammal_bmr)

acf(resid(re_model))

gf_dhistogram(~re_resids, data = mammal_bmr) %>%
  gf_fitdistr()

1.3.2 Predictions from Model

mammal_bmr <- mammal_bmr %>%
  mutate(pretty_diet = case_when(diet == 'c'~ 'Carnivore',
                                 diet == 'h' ~ 'Herbivore',
                                 diet == 'o' ~ 'Omnivore'),
         pretty_diet = factor(pretty_diet, levels = c('Herbivore', 'Omnivore', 'Carnivore')),
         Habitat = stringr::str_to_sentence(habitat))

re_preds <- gf_point(10^log10.bmr ~ 10^log10.body.mass,
         color = ~Habitat, data = mammal_bmr,
         size = 0.5, alpha = 0.4,
         text = ~animal) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
         color = ~Habitat,
         data = mammal_bmr,
        text = ~Habitat,
        alpha = 1) %>%
  gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
            color = ~Habitat, fill = ~Habitat,
            text = ~Habitat,
            alpha = 0.4) %>%
  # gf_facet_grid(~ pretty_diet) %>%
  gf_labs(x = 'Body Mass (kg)', y = 'BMR (kcal/day)') %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)),
            scale_y_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))
            ) %>%
  gf_theme(plot.margin = unit(c(1,1,1,1), "cm"))
  # Kleiber
  # BMR (Kleiber, 1947): BMR (kcal/day) = 70 * body mass (kg) ^0.75
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>%
  mutate(kleiber_bmr = 70*mass^0.75)

re_preds <- re_preds %>%
  gf_line(kleiber_bmr ~ mass, data = pub_models, color = 'black', 
          text = ~"Kleiber") %>% 
  # c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
  gf_theme(legend.position = c(0.9, 0.1),
           legend.title = element_blank()) %>%
  gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))

re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')

Figure 1.1: Observed and predicted BMR as a function of mass. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, and black line is based on Kleiber (1947).

re_preds
gf_point(log10.bmr ~ re_ind_fitted, data = mammal_bmr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(BMR)',
          x = 'Model-predicted, Species-specific log10(BMR)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?

no_species <- mammal_bmr %>%
  mutate(species = NA)
re_genus_level_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = NULL,
                    newdata = no_species)

gf_point(log10.bmr ~ re_genus_level_preds$fit, data = mammal_bmr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(BMR)',
          x = 'Model-predicted, Genus-specific log10(BMR)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

1.3.3 Alternate version: Mass Weightings

Weight observations by body mass, binned into log mass bins. The goal is to reduce the undue influence of many data points collected from very small animals.

nr <- nrow(mammal_bmr)
mammal_bmr <- mammal_bmr %>%
  mutate(log.mass.bin = cut_width(log10.body.mass,
                                  width = 1,
                                  boundary = log10(0.001))) %>%
  group_by(log.mass.bin) %>%
  mutate(log.mass.weights = 1 / n())

mammal_bmr <- mammal_bmr %>%
  # make weights sum to nrow(mammal_bmr)
  mutate(log.mass.weights = log.mass.weights / sum(mammal_bmr$log.mass.weights) * nrow(mammal_bmr)) %>%
  ungroup()
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
wt_re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat +
                      log10.body.mass*above.10kg + # diet +
                      (1 | order/genus/species),
                    weights = log.mass.weights,
                 data = mammal_bmr)

tab_model(wt_re_model) 
  log 10.bmr
Predictors Estimates CI p
(Intercept) 1.85 1.68 – 2.02 <0.001
log10.body.mass 0.74 0.66 – 0.82 <0.001
habitat [land] -0.25 -0.40 – -0.09 0.002
above.10kg [Smaller] 0.06 -0.06 – 0.19 0.335
log10.body.mass * habitat
[land]
0.05 -0.04 – 0.14 0.261
log10.body.mass *
above.10kg [Smaller]
-0.10 -0.18 – -0.02 0.019
Random Effects
σ2 0.00
τ00 species:(genus:order) 0.01
τ00 genus:order 0.01
τ00 order 0.02
ICC 0.99
N species 626
N genus 415
N order 24
Observations 722
Marginal R2 / Conditional R2 0.949 / 0.999
wt_re_anova_results <- car::Anova(wt_re_model)
pander(wt_re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 5405 1 0
habitat 17.61 1 2.712e-05
above.10kg 7.242 1 0.00712
log10.body.mass:habitat 1.264 1 0.2608
log10.body.mass:above.10kg 5.482 1 0.01922

1.3.4 Alternate version: Massive only

Only include species with body mass of 10 kg or more.

big_mammal_bmr <- mammal_bmr %>%
  filter(log10.body.mass >= log10(10)) %>%
  mutate(across(where(is.factor), factor)) 
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
big_re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat + # diet +
                      (1 | order/genus/species),
                      data = big_mammal_bmr)

tab_model(big_re_model) 
  log 10.bmr
Predictors Estimates CI p
(Intercept) 1.92 1.67 – 2.16 <0.001
log10.body.mass 0.68 0.58 – 0.78 <0.001
habitat [land] -0.41 -0.65 – -0.17 0.001
log10.body.mass * habitat
[land]
0.13 0.01 – 0.26 0.037
Random Effects
σ2 0.01
τ00 species:(genus:order) 0.00
τ00 genus:order 0.01
τ00 order 0.04
ICC 0.82
N species 58
N genus 52
N order 12
Observations 60
Marginal R2 / Conditional R2 0.820 / 0.968
big_re_anova_results <- car::Anova(big_re_model)
pander(big_re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 417.5 1 8.542e-93
habitat 14.49 1 0.0001409
log10.body.mass:habitat 4.372 1 0.03654

1.4 PGLS

1.4.1 Read in Tree Data

#Read in trees from Upham et al

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)

all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_bmr %>%
  mutate(animal = as.character(animal)) %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species (why sample and not average -- seems dubious??)
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 unique(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
 pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10.bmr ~ log10.body.mass * habitat + 
                     log10.body.mass * above.10kg, # diet ,
                   correlation = corPagel(value = 0.8, 
                                          phy = refit_tree, 
                                          fixed = FALSE, 
                                          form = ~animal),
                   data = pgls_rep_data)
    },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}
pgls_models <- pgls_models[which(lapply(pgls_models, is.null) == FALSE)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 0.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)

pander(pooled_pgls_summ %>% select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 %
(Intercept) 1.689 0.1618 1.371
log10.body.mass 0.7731 0.03877 0.697
habitatland -0.1617 0.07006 -0.2992
above.10kgSmaller 0.05392 0.05733 -0.05863
log10.body.mass:habitatland 0.009598 0.04303 -0.07489
log10.body.mass:above.10kgSmaller -0.07757 0.03826 -0.1527
97.5 % lambda fmi
2.007 0.002002 0.004827
0.8492 0.001221 0.004046
-0.02413 0.002051 0.004876
0.1665 0.002913 0.005738
0.09408 0.001059 0.003884
-0.002445 0.001043 0.003868
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info
log10.body.mass 5087 1 8273024 0.003339
habitat 10.73 1 10870364 0.002912
above.10kg 3.936 1 1855658 0.00705
log10.body.mass:habitat 0.04975 1 82138092 0.001059
log10.body.mass:above.10kg 4.11 1 84808649 0.001043
  riv Pr(>F)
log10.body.mass 0.00335 0
habitat 0.002921 0.001054
above.10kg 0.0071 0.04726
log10.body.mass:habitat 0.001061 0.8235
log10.body.mass:above.10kg 0.001044 0.04264

Alternative approach: using MuMIn to do model averaging. This will weight models according to their information criteria scores instead of weighting them all equally. But we see here the results are nearly identical.

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)

This gives a model with the same coefficients (same model) that I think we can better make predictions from. The other way (with pool()) is better for getting the ANOVA results we wanted. Two ways of getting the same result, in different packages.

For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.

Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.

lambda <- mean(unlist(purrr::map(pgls_models, 
                                 function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.8900021

According to this simple method our estimate is \(\hat{\Lambda} =\) 0.89.

1.4.2 Model Assessment

It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?

1.4.3 Prediction Plots

There’s a problem here in that the “pooled” model object does NOT have any information stored anymore about the variance-covariance matrix, which is key for making predictions from the model. So for making predictions we need to use package MuMIn and its model.avg() function, just weighting equally all the models from all the trees. This results in a fitted single model object from which prediction is possible with predict(). From initial inspection the coefficient estimates of the averaged model are the same as the previous version averaged with the mice package.

pgls_preds <- predict(pgls_avg, 
                    se.fit = TRUE,
                    newdata = mammal_bmr)

mammal_bmr <- mammal_bmr %>%
  mutate(pgls_fitted = pgls_preds$fit,
         pgls_lo = pgls_preds$fit - 1.96*pgls_preds$se.fit,
         pgls_hi = pgls_preds$fit + 1.96*pgls_preds$se.fit
         )

gf_line(pgls_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_bmr) %>%
  gf_ribbon(pgls_lo + pgls_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>%
  # gf_facet_grid(~ diet) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) 

gf_point(log10.bmr ~ pgls_fitted,
         data = mammal_bmr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(BMR)',
          x = 'Model-predicted log10(BMR)',
          title = 'PGLS Model') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

Notice that the CIs are wider here than in the original linear regression model. This is because of appropriate incorporation of the fact that the observations are not independent and there’s phylogenetic signal in the data.

As far as I know though, there is not a way to make predictions for specific species or taxa that are more accurate and account for the “way” in which the species/taxon tends to deviate from the overall average. Instead, our overall uncertainty is adjusted to account for the dependence that we know is in the data. But we can’t (I don’t think) decompose it into a random part and a phylogeny part like we can with the random effects; they are both mixed inextricably together.

1.5 Cross-validation

We could consider grouping by by body-mass ranges or phylogeny and then assess predictive accuracy as the measure of “success”

This remains to do.

1.6 What’s next?

Want to repeat a similar analysis with other datasets on other response variables of interest in a “physiological hierarchy”:

  • Stroke volume (sv)
  • Heart rate (hr)
  • Breathing Freq (bf)
  • Tidal volume (tv)

All these are other candidate response variables. We started with metabolism, and want to move on to these other metrics.

Main predictor of interest is habitat, controlling for body size and phylogeny: Are aquatic/marine animals different in some notable way?

Data sets for these other variables are smaller (about n = 50 species).

2 Mammal Breathing Frequency

2.1 Data

Predictors: body mass interacting with habitat.

2.1.1 Read in data

mammal_fr <- read.csv("data/fr.csv") 

2.1.2 Notes

  • log10fr is the base-10 logarithm of breathing frequency (units not known?)

2.1.3 Cleaning

Rename some variables and create species name from genus and species.

mammal_fr <- mammal_fr %>%
  rename(source = Source,
         order = order.corrected,
        # n.animals = Number.of.Animals,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And factor() “chr” variables.

mammal_fr <- mammal_fr %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10fr,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

2.1.4 Viz

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10fr ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_fr,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(Mass (kg))',
          y = 'Log10(fR (breaths/min))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.

2.2 GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?

lm_model <- lm(log10fr ~ log10.body.mass * habitat,
                 data = mammal_fr)
summary(lm_model)
## 
## Call:
## lm(formula = log10fr ~ log10.body.mass * habitat, data = mammal_fr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0346 -0.1332  0.0048  0.1628  0.7500 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.52694    0.12823  11.908  < 2e-16 ***
## log10.body.mass             -0.40700    0.04817  -8.450 2.94e-13 ***
## habitatland                  0.21030    0.13763   1.528 0.129768    
## log10.body.mass:habitatland  0.19517    0.05445   3.584 0.000531 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2659 on 97 degrees of freedom
## Multiple R-squared:  0.7982, Adjusted R-squared:  0.792 
## F-statistic: 127.9 on 3 and 97 DF,  p-value: < 2.2e-16
tab_model(lm_model) 
  log 10 fr
Predictors Estimates CI p
(Intercept) 1.53 1.27 – 1.78 <0.001
log10.body.mass -0.41 -0.50 – -0.31 <0.001
habitat [land] 0.21 -0.06 – 0.48 0.130
log10.body.mass * habitat
[land]
0.20 0.09 – 0.30 0.001
Observations 101
R2 / R2 adjusted 0.798 / 0.792
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 9.063 1 128.1 1.956e-19
habitat 8.024 1 113.4 5.293e-18
log10.body.mass:habitat 0.9087 1 12.85 0.0005309
Residuals 6.861 97 NA NA

2.2.1 Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_fr <- mammal_fr  %>% mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_fr)

s245::gf_acf(~lm_model)

gf_dhistogram(~lm_resids, data = mammal_fr,
              bins = 20) %>%
  gf_fitdistr()

2.2.2 Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(lm_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_fr) %>%
  gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors))

gf_point(log10fr ~ lm_fitted, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fR)',
          x = 'Model-predicted log10(fR)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

2.3 Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.

re_model <- glmmTMB(log10fr ~ log10.body.mass * habitat + (1 | order/genus/species),
                 data = mammal_fr)
summary(re_model)
##  Family: gaussian  ( identity )
## Formula:          
## log10fr ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_fr
## 
##      AIC      BIC   logLik deviance df.resid 
##     10.3     31.2      2.8     -5.7       93 
## 
## Random effects:
## 
## Conditional model:
##  Groups                Name        Variance Std.Dev.
##  species:(genus:order) (Intercept) 0.01268  0.1126  
##  genus:order           (Intercept) 0.01781  0.1334  
##  order                 (Intercept) 0.05066  0.2251  
##  Residual                          0.01752  0.1324  
## Number of obs: 101, groups:  
## species:(genus:order), 101; genus:order, 85; order, 10
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0175 
## 
## Conditional model:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.53600    0.14947  10.276  < 2e-16 ***
## log10.body.mass             -0.42091    0.04738  -8.884  < 2e-16 ***
## habitatland                  0.05191    0.13167   0.394    0.693    
## log10.body.mass:habitatland  0.23521    0.05120   4.594 4.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model) 
  log 10 fr
Predictors Estimates CI p
(Intercept) 1.54 1.24 – 1.83 <0.001
log10.body.mass -0.42 -0.51 – -0.33 <0.001
habitat [land] 0.05 -0.21 – 0.31 0.693
log10.body.mass * habitat
[land]
0.24 0.13 – 0.34 <0.001
Random Effects
σ2 0.02
τ00 species:(genus:order) 0.01
τ00 genus:order 0.02
τ00 order 0.05
ICC 0.80
N species 98
N genus 85
N order 10
Observations 101
Marginal R2 / Conditional R2 0.730 / 0.945
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 70.12 1 5.581e-17
habitat 88.75 1 4.474e-21
log10.body.mass:habitat 21.1 1 4.355e-06

2.3.1 Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_fr <- mammal_fr %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)

# save fitted model and data
saveRDS(mammal_fr, 'fitted-models/fr-data.RDS')
saveRDS(re_model, 'fitted-models/fr-re-model.RDS')


gf_point(re_resids ~ re_ind_fitted, data = mammal_fr)

acf(resid(re_model))

#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_fr) %>%
  gf_fitdistr()

2.3.2 Predictions from Model

mammal_fr <- mammal_fr %>%
  mutate(Habitat = stringr::str_to_sentence(habitat))

re_preds <- gf_point(10^log10fr ~ 10^log10.body.mass,
         color = ~Habitat,
        text = ~animal,
         data = mammal_fr) %>% 
  gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
         color = ~Habitat,
         data = mammal_fr,
         text = ~Habitat) %>%
  gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
            color = ~Habitat, fill = ~Habitat, 
            text = ~Habitat) %>% 
  gf_labs(x = 'Body Mass (kg)', y = 'Breathing frequency\n(breaths/minute)') %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10',
                              labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)),
            scale_y_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)))

# For breathing frequency, terrestrial (Stahl 1967): fR (breaths/minute) = 53.5 * body mass ^-0.26
# For breathing frequency, marine (Mortola, 2006): fR (breaths/minute) = 33 * body mass ^-0.42

pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # mass = seq(from = 0.015, by = 0.1, to = 42000)) %>%
  mutate(fr_stahl = 53.5*mass^-0.26,
         fr_mortola = 33*mass^-0.42)

re_preds <- re_preds %>%
  gf_line(fr_stahl ~ mass, data = pub_models, color = 'black',
          text = ~'Stahl') %>%
  gf_line(fr_mortola ~ mass, data = pub_models, color = 'black', linetype = 'dashed',
          text = ~'Mortola') %>%
  # c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
  gf_theme(legend.position = c(0.9, 0.9),
           legend.title = element_blank()) %>%
  gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')

Figure 2.1: Observed and predicted breathing frequency as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) for terrestrial species, and dashed black line on Mortola (2006) for marine species.

gf_point(log10fr ~ re_ind_fitted, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fr)',
          x = 'Model-predicted, Species-specific log10(fr)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?

no_species <- mammal_fr %>%
  mutate(species = NA)

re_genus_level_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = NULL,
                    newdata = no_species)

gf_point(log10fr ~ re_genus_level_preds$fit, data = mammal_fr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fH)',
          x = 'Model-predicted, Genus-specific log10(fr)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

2.4 PGLS

2.4.1 Read in Tree Data

#Read in the trees from Upham et al
#Upham et al provide four sets of 10000 trees
# ??? There are 101 files of trees in the directory?
# original code sampled 4x10 trees from a set of 100
# here I am just using all 101 that are there, is that oK?

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)

all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_fr %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species (seems dubious??)
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 levels(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
 pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10fr ~ log10.body.mass * habitat,
                   correlation = corPagel(value = 0.8, 
                                          phy = refit_tree, 
                                          fixed = FALSE, 
                                          form = ~animal),
                   data = pgls_rep_data)
    },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 0.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
#summary(pgls_mira)
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
#pooled_pgls_summ <- summary(pool(pgls_mira, type = 'all', conf.int = TRUE))

pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 % 97.5 %
(Intercept) 1.609 0.1827 1.247 1.972
log10.body.mass -0.415 0.04703 -0.5084 -0.3216
habitatland 0.1285 0.1379 -0.1453 0.4024
log10.body.mass:habitatland 0.1935 0.05439 0.08551 0.3015
lambda fmi
0.001209 0.0216
0.0006789 0.02107
0.001403 0.0218
0.0139 0.03429
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info riv
log10.body.mass 145.8 1 42780 0.04647 0.04873
habitat 85.92 1 13315 0.08337 0.09095
log10.body.mass:habitat 12.66 1 477466 0.0139 0.0141
  Pr(>F)
log10.body.mass 1.644e-33
habitat 2.156e-20
log10.body.mass:habitat 0.0003741

Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally.

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.7097979

3 Mammal Heart Rates

3.1 Data

Predictors: body mass interacting with habitat.

3.1.1 Read in data

mammal_hr <- read.csv("data/hr.csv") 

3.1.2 Notes

  • log10hr is the base-10 logarithm of the heart rate in beats/min

3.1.3 Cleaning

Rename some variables and create species name from genus and species.

mammal_hr <- mammal_hr %>%
  rename(source = Source,
         order = order.corrected,
        # n.animals = Number.of.Animals,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And “factor” “chr” variables.

mammal_hr <- mammal_hr %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10hr,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

3.1.4 Viz

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10hr ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_hr,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(fH (beats/min))',
          y = 'Log10(BMR (kcal/day))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.

3.2 GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?

lm_model <- lm(log10hr ~ log10.body.mass * habitat,
                 data = mammal_hr)
summary(lm_model)
## 
## Call:
## lm(formula = log10hr ~ log10.body.mass * habitat, data = mammal_hr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35008 -0.09705 -0.00279  0.07544  0.46383 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  2.30719    0.04970  46.419   <2e-16 ***
## log10.body.mass             -0.19865    0.01931 -10.288   <2e-16 ***
## habitatland                  0.01562    0.05638   0.277    0.782    
## log10.body.mass:habitatland -0.01550    0.02415  -0.642    0.523    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1371 on 85 degrees of freedom
## Multiple R-squared:  0.8096, Adjusted R-squared:  0.8028 
## F-statistic: 120.4 on 3 and 85 DF,  p-value: < 2.2e-16
tab_model(lm_model) 
  log 10 hr
Predictors Estimates CI p
(Intercept) 2.31 2.21 – 2.41 <0.001
log10.body.mass -0.20 -0.24 – -0.16 <0.001
habitat [land] 0.02 -0.10 – 0.13 0.782
log10.body.mass * habitat
[land]
-0.02 -0.06 – 0.03 0.523
Observations 89
R2 / R2 adjusted 0.810 / 0.803
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 6.081 1 323.3 1.043e-30
habitat 0.003741 1 0.1989 0.6567
log10.body.mass:habitat 0.007742 1 0.4116 0.5229
Residuals 1.599 85 NA NA

3.2.1 Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_hr <- mammal_hr  %>% mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_hr)

s245::gf_acf(~lm_model)

gf_dhistogram(~lm_resids, data = mammal_hr,
              bins = 17) %>%
  gf_fitdistr()

3.2.2 Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(lm_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_hr) %>%
  gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) 

gf_point(log10hr ~ lm_fitted, data = mammal_hr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fH)',
          x = 'Model-predicted log10(fH)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

Can refine the plot of predictions later on.

3.3 Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.

re_model <- glmmTMB(log10hr ~ log10.body.mass * habitat + (1 | order/genus),
                 data = mammal_hr)
summary(re_model)
##  Family: gaussian  ( identity )
## Formula:          log10hr ~ log10.body.mass * habitat + (1 | order/genus)
## Data: mammal_hr
## 
##      AIC      BIC   logLik deviance df.resid 
##    -92.5    -75.1     53.2   -106.5       82 
## 
## Random effects:
## 
## Conditional model:
##  Groups      Name        Variance  Std.Dev. 
##  genus:order (Intercept) 5.435e-03 7.372e-02
##  order       (Intercept) 2.571e-12 1.604e-06
##  Residual                1.271e-02 1.128e-01
## Number of obs: 89, groups:  genus:order, 69; order, 10
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0127 
## 
## Conditional model:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  2.30472    0.04946   46.59   <2e-16 ***
## log10.body.mass             -0.19728    0.01945  -10.14   <2e-16 ***
## habitatland                  0.01622    0.05535    0.29    0.769    
## log10.body.mass:habitatland -0.01504    0.02417   -0.62    0.534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model) 
  log 10 hr
Predictors Estimates CI p
(Intercept) 2.30 2.21 – 2.40 <0.001
log10.body.mass -0.20 -0.24 – -0.16 <0.001
habitat [land] 0.02 -0.09 – 0.12 0.769
log10.body.mass * habitat
[land]
-0.02 -0.06 – 0.03 0.534
Random Effects
σ2 0.01
τ00 genus:order 0.01
τ00 order 0.00
N genus 69
N order 10
Observations 89
Marginal R2 / Conditional R2 0.857 / NA
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 298 1 8.87e-67
habitat 0.1295 1 0.7189
log10.body.mass:habitat 0.3871 1 0.5338

Note: for this model the random effect of species is excluded as we don’t have enough species with more than one observation to fit it well.

3.3.1 Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_hr <- mammal_hr %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)

# save fitted model and data
saveRDS(mammal_hr, 'fitted-models/hr-data.RDS')
saveRDS(re_model, 'fitted-models/hr-re-model.RDS')

gf_point(re_resids ~ re_ind_fitted, data = mammal_hr)

acf(resid(re_model))

#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_hr, bins = 15) %>%
  gf_fitdistr()

3.3.2 Predictions from Model

mammal_hr <- mammal_hr %>%
  mutate(Habitat = stringr::str_to_sentence(habitat))

re_preds <- gf_point(10^log10hr ~ 10^log10.body.mass,
         color = ~Habitat,
        text = ~animal,
         data = mammal_hr) %>% 
  gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
         color = ~Habitat,
         data = mammal_hr,
         text = ~Habitat) %>%
  gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
            color = ~Habitat, fill = ~Habitat, 
            text = ~Habitat) %>% 
  gf_labs(x = 'Body Mass (kg)', y = 'Heart Rate (beats/minute)') %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)
                               ),
            scale_y_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))
            )

# For heart rate (Stahl 1967): fH (beats/minute)= 241 * body mass (kg) ^-0.25
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.009, by = 0.1, to = 70000)) %>%
  mutate(hr_stahl = 241*mass^-0.25)

re_preds <- re_preds %>%
  gf_line(hr_stahl ~ mass, data = pub_models, color = 'black',
          text = ~'Stahl') %>%
  # c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
  gf_theme(legend.position = c(0.9, 0.9),
           legend.title = element_blank()) %>%
  gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')

Figure 3.1: Observed and predicted heart rate as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).

gf_point(log10hr ~ re_ind_fitted, data = mammal_hr,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fH)',
          x = 'Model-predicted, Species-specific log10(fH)',
          title = 'Mixed-effects Model (RE of Order/Genus)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

3.4 PGLS

3.4.1 Read in Tree Data

#Read in the trees from Upham et al

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)

all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_hr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_hr %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species (seems dubious??)
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 levels(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
  pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10hr ~ log10.body.mass * habitat,
                 correlation = corPagel(value = 0.8, 
                                        phy = refit_tree, 
                                        fixed = FALSE, 
                                        form = ~animal),
                 data = pgls_rep_data)
  },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}

pgls_models <- pgls_models[!sapply(pgls_models, is.null)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 20.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)

pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 % 97.5 %
(Intercept) 2.288 0.06053 2.167 2.409
log10.body.mass -0.1991 0.01934 -0.2376 -0.1606
habitatland 0.01418 0.05727 -0.09982 0.1282
log10.body.mass:habitatland -0.01487 0.02464 -0.06393 0.0342
lambda fmi
0.1624 0.1868
0.01856 0.04263
0.01526 0.03933
0.03128 0.05535
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info
log10.body.mass 281.8 1 169615 0.02065
habitat 0.1756 1 23725 0.05527
log10.body.mass:habitat 0.3641 1 73941 0.03128
  riv Pr(>F)
log10.body.mass 0.02109 3.437e-63
habitat 0.0585 0.6752
log10.body.mass:habitat 0.03229 0.5463

Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally?

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)

This object is one that we can better make predictions from. The other way (with pool()) is better for getting the ANOVA results.

For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.

Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.

lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.02275235

According to this simple method our estimate is \(\hat{\Lambda} =\) 0.023.

3.4.2 Model Assessment

It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?

4 Mammal Stroke Volume

4.1 Data

Predictors: body mass interacting with habitat.

4.1.1 Read in data

mammal_sv <- read.csv("data/sv.csv") 

4.1.2 Notes

  • log10sv is the base-10 logarithm of stroke volume in ml

4.1.3 Cleaning

Rename some variables and create species name from genus and species.

mammal_sv <- mammal_sv %>%
  rename(source = Source,
         order = order.corrected,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And factor() “chr” variables.

mammal_sv <- mammal_sv %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10sv,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

4.1.4 Viz

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10sv ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_sv,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(Mass (kg))',
          y = 'Log10(SV (ml))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.

4.2 GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?

lm_model <- lm(log10sv ~ log10.body.mass * habitat,
                 data = mammal_sv)
summary(lm_model)
## 
## Call:
## lm(formula = log10sv ~ log10.body.mass * habitat, data = mammal_sv)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38559 -0.09944 -0.03216  0.14260  0.36045 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  0.16485    0.20949   0.787    0.439    
## log10.body.mass              0.99959    0.09295  10.754 7.25e-11 ***
## habitatland                 -0.19610    0.21600  -0.908    0.373    
## log10.body.mass:habitatland  0.04781    0.09705   0.493    0.627    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1745 on 25 degrees of freedom
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9836 
## F-statistic: 559.1 on 3 and 25 DF,  p-value: < 2.2e-16
tab_model(lm_model) 
  log 10 sv
Predictors Estimates CI p
(Intercept) 0.16 -0.27 – 0.60 0.439
log10.body.mass 1.00 0.81 – 1.19 <0.001
habitat [land] -0.20 -0.64 – 0.25 0.373
log10.body.mass * habitat
[land]
0.05 -0.15 – 0.25 0.627
Observations 29
R2 / R2 adjusted 0.985 / 0.984
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 46.39 1 1524 6.308e-24
habitat 0.04624 1 1.519 0.2292
log10.body.mass:habitat 0.007388 1 0.2427 0.6265
Residuals 0.761 25 NA NA

4.2.1 Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_sv <- mammal_sv  %>% mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_sv)

acf(resid(lm_model))

gf_dhistogram(~lm_resids, data = mammal_sv,
              bins = 7) %>%
  gf_fitdistr()

4.2.2 Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(lm_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_sv) %>%
  gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors))

gf_point(log10sv ~ lm_fitted, data = mammal_sv,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(fH)',
          x = 'Model-predicted log10(fH)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

Can refine the plot of predictions later on.

4.3 Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.

re_model <- glmmTMB(log10sv ~ log10.body.mass * habitat +
                      (1 | order/genus/species),
                 data = mammal_sv)
summary(re_model)
##  Family: gaussian  ( identity )
## Formula:          
## log10sv ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_sv
## 
##      AIC      BIC   logLik deviance df.resid 
##     -9.5      1.5     12.7    -25.5       21 
## 
## Random effects:
## 
## Conditional model:
##  Groups                Name        Variance  Std.Dev. 
##  species:(genus:order) (Intercept) 1.740e-03 4.172e-02
##  genus:order           (Intercept) 2.360e-02 1.536e-01
##  order                 (Intercept) 8.935e-11 9.452e-06
##  Residual                          1.579e-03 3.973e-02
## Number of obs: 29, groups:  
## species:(genus:order), 29; genus:order, 27; order, 9
## 
## Dispersion estimate for gaussian family (sigma^2): 0.00158 
## 
## Conditional model:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.16485    0.19699   0.837    0.403    
## log10.body.mass              0.99959    0.08741  11.436   <2e-16 ***
## habitatland                 -0.17116    0.20442  -0.837    0.402    
## log10.body.mass:habitatland  0.03755    0.09173   0.409    0.682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model) 
  log 10 sv
Predictors Estimates CI p
(Intercept) 0.16 -0.22 – 0.55 0.403
log10.body.mass 1.00 0.83 – 1.17 <0.001
habitat [land] -0.17 -0.57 – 0.23 0.402
log10.body.mass * habitat
[land]
0.04 -0.14 – 0.22 0.682
Random Effects
σ2 0.00
τ00 species:(genus:order) 0.00
τ00 genus:order 0.02
τ00 order 0.00
N species 28
N genus 27
N order 9
Observations 29
Marginal R2 / Conditional R2 0.999 / NA
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 1520 1 0
habitat 1.576 1 0.2093
log10.body.mass:habitat 0.1676 1 0.6823

4.3.1 Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_sv <- mammal_sv %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
gf_point(re_resids ~ re_ind_fitted, data = mammal_sv)

# save fitted model and data
saveRDS(mammal_sv, 'fitted-models/sv-data.RDS')
saveRDS(re_model, 'fitted-models/sv-re-model.RDS')


acf(resid(re_model))
acf(resid(re_model))

gf_dhistogram(~re_resids, data = mammal_sv,
              bins = 7) %>%
  gf_fitdistr()

4.3.2 Predictions from Model

mammal_sv <- mammal_sv %>%
  mutate(Habitat = stringr::str_to_sentence(habitat))

re_preds <- gf_point(10^log10sv ~ 10^log10.body.mass,
         color = ~Habitat,
        text = ~animal,
         data = mammal_sv) %>% 
  gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
         color = ~Habitat,
         data = mammal_sv,
         text = ~Habitat) %>%
  gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
            color = ~Habitat, fill = ~Habitat, 
            text = ~Habitat) %>% 
  gf_labs(x = 'Body Mass (kg)', y = 'Stroke Volume (mL/beat)') %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)),
            scale_y_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))
  )

# For stroke volume (Stahl 1967, using prediction equations from Stahl for cardiac output (ml/min) and heat rate(beats/min)): SV (ml/beat) = [187 * body mass(kg)^0.81]/[241 * body mass (kg) ^-0.25]

pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.029, by = 0.1, to = 5000)) %>%
  mutate(sv_stahl = (187*mass^0.81) / (241 * mass^-0.25))

re_preds <- re_preds %>%
  gf_line(sv_stahl ~ mass, data = pub_models, color = 'black',
          text = ~'Stahl') %>%
  # c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
  gf_theme(legend.position = c(0.9, 0.1),
           legend.title = element_blank()) %>%
  gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')

Figure 4.1: Observed and predicted stroke volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).

gf_point(log10sv ~ re_ind_fitted, data = mammal_sv,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(sv)',
          x = 'Model-predicted, Species-specific log10(sv)',
          title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

4.4 PGLS

#Read in the trees from Upham et al

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)


all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_sv that are in all the trees
# on 6/17 this removes NO species.
pgls_data <- mammal_sv %>%
  mutate(animal = as.character(animal)) %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 unique(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
 pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10sv ~ log10.body.mass * habitat,
                 correlation = corPagel(value = 0.8, 
                                        phy = refit_tree, 
                                        fixed = FALSE, 
                                        form = ~animal),
                 data = pgls_rep_data)
  },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}

pgls_models <- pgls_models[!sapply(pgls_models, is.null)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 4.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)

pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 % 97.5 %
(Intercept) 0.2041 0.21 -0.2354 0.6436
log10.body.mass 0.9929 0.09319 0.7988 1.187
habitatland -0.2378 0.2206 -0.6989 0.2234
log10.body.mass:habitatland 0.05945 0.09692 -0.1422 0.2611
lambda fmi
0.1758 0.2507
0.1154 0.1908
0.1604 0.2354
0.09824 0.1737
# this depends on inner workings of package car found in gls-utils.R
# it used to work without all this, in summer 2021 stopped
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info riv
log10.body.mass 1419 1 6153 0.1201 0.1365
habitat 1.953 1 2290 0.1972 0.2457
log10.body.mass:habitat 0.3762 1 9181 0.09824 0.1089
  Pr(>F)
log10.body.mass 1.219e-279
habitat 0.1624
log10.body.mass:habitat 0.5397

Alternative approach: using MuMIn to do model averaging. Was it right to weight all of the models/trees equally?

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)

This gives a model with the same coefficients (same model) that we can better make predict()ions from.

For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.

Simple approach: take the mean of the estimates of \(\Lambda\) from all the individual fitted models.

lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] -0.110112

4.4.1 Model Assessment

It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?

5 Mammal Tidal Volume

5.1 Data

Predictors: body mass interacting with habitat.

5.1.1 Read in data

mammal_vt <- read.csv("data/vt.csv") 

5.1.2 Notes

  • log10vt is the base-10 logarithm of tidal volume in ml

5.1.3 Cleaning

Rename some variables and create species name from genus and species.

mammal_vt <- mammal_vt %>%
  rename(source = Source,
         order = order.corrected,
         mass.kg = body.mass..kg.) %>%
  mutate(order = str_to_title(order),
         genus = str_to_title(genus),
         animal = paste(genus, species),
         above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))

Keep only variables we will be using. And factor() “chr” variables.

mammal_vt <- mammal_vt %>%
  dplyr::select(order,
                genus,
                species, 
                common.name,
                mass.kg,
                log10.body.mass,
                log10vt,
                habitat,
                source,
                animal,
                above.10kg
         ) %>%
  mutate(across(where(is.character), factor)) %>%
  arrange(order, genus, species)

5.1.4 Viz

A few quick graphs just to make sure the data are looking as we expect (error checking).

my_scatter_plot <- gf_point(log10vt ~ log10.body.mass | habitat,
         color = ~order,
         # size = ~parse_number(n_animals),
         data = mammal_vt,
         alpha = 0.5) %>%
  gf_theme(legend.position = 'bottom',
           legend.title = element_text(size = 8),
           legend.text = element_text(size = 6)) %>%
  gf_theme(scale_color_viridis_d('Order')) %>%
  gf_labs(x = 'Log10(Mass (kg))',
          y = 'Log10(Vt (liter))') 
my_scatter_plot

plotly::ggplotly(my_scatter_plot) %>%
  plotly::layout(legend = list(#orientation = 'h',
                               font = list(size = 6)))

5.2 GLS

Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat.

lm_model <- lm(log10vt ~ log10.body.mass * habitat,
                 data = mammal_vt)
tab_model(lm_model) 
  log 10 vt
Predictors Estimates CI p
(Intercept) 1.60 1.22 – 1.98 <0.001
log10.body.mass 0.94 0.80 – 1.08 <0.001
habitat [land] -0.63 -1.02 – -0.25 0.002
log10.body.mass * habitat
[land]
0.07 -0.07 – 0.22 0.311
Observations 50
R2 / R2 adjusted 0.990 / 0.990
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
Anova Table (Type II tests)
  Sum Sq Df F value Pr(>F)
log10.body.mass 76.68 1 2682 1.949e-42
habitat 1.376 1 48.12 1.134e-08
log10.body.mass:habitat 0.03001 1 1.05 0.3109
Residuals 1.315 46 NA NA

5.2.1 Model Assessment

lm_preds <- predict(lm_model, se.fit = TRUE)

mammal_vt <- mammal_vt  %>% mutate(lm_resids = resid(lm_model),
         lm_fitted = lm_preds$fit,
         lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
         lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_vt)

s245::gf_acf(~lm_model)

gf_dhistogram(~lm_resids, data = mammal_vt,
              bins = 20) %>%
  gf_fitdistr()

5.2.2 Model Predictions

Any predictors not shown in a plot are held constant at their mean or most common value.

gf_line(lm_fitted ~ log10.body.mass,
         color = ~habitat,
         data = mammal_vt) %>%
  gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
            color = ~habitat, fill = ~habitat) %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors))

gf_point(log10vt ~ lm_fitted, data = mammal_vt,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(Vt)',
          x = 'Model-predicted log10(Vt)',
          title = 'Linear Regresssion (no phylogeny)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

5.3 Mixed-effects model

Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are required to be “further” apart than any other two.

re_model <- glmmTMB(log10vt ~ log10.body.mass * habitat + (1 | order/genus/species),
                 data = mammal_vt)

summary(re_model)
##  Family: gaussian  ( identity )
## Formula:          
## log10vt ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_vt
## 
##      AIC      BIC   logLik deviance df.resid 
##    -27.9    -12.6     22.0    -43.9       42 
## 
## Random effects:
## 
## Conditional model:
##  Groups                Name        Variance  Std.Dev.
##  species:(genus:order) (Intercept) 0.0108807 0.10431 
##  genus:order           (Intercept) 0.0069420 0.08332 
##  order                 (Intercept) 0.0004543 0.02132 
##  Residual                          0.0073999 0.08602 
## Number of obs: 50, groups:  
## species:(genus:order), 47; genus:order, 42; order, 9
## 
## Dispersion estimate for gaussian family (sigma^2): 0.0074 
## 
## Conditional model:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.61392    0.19456   8.295  < 2e-16 ***
## log10.body.mass              0.93222    0.07429  12.548  < 2e-16 ***
## habitatland                 -0.65494    0.19922  -3.288  0.00101 ** 
## log10.body.mass:habitatland  0.07660    0.07244   1.057  0.29036    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model) 
  log 10 vt
Predictors Estimates CI p
(Intercept) 1.61 1.23 – 2.00 <0.001
log10.body.mass 0.93 0.79 – 1.08 <0.001
habitat [land] -0.65 -1.05 – -0.26 0.001
log10.body.mass * habitat
[land]
0.08 -0.07 – 0.22 0.290
Random Effects
σ2 0.01
τ00 species:(genus:order) 0.01
τ00 genus:order 0.01
τ00 order 0.00
ICC 0.71
N species 47
N genus 42
N order 9
Observations 50
Marginal R2 / Conditional R2 0.991 / 0.997
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Analysis of Deviance Table (Type II Wald chisquare tests)
  Chisq Df Pr(>Chisq)
log10.body.mass 1594 1 0
habitat 40.93 1 1.58e-10
log10.body.mass:habitat 1.118 1 0.2904

5.3.1 Model Assessment

re_ave_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = ~0)
re_ind_preds <- predict(re_model,
                        se.fit = TRUE,
                        re.form = NULL)
mammal_vt <- mammal_vt %>%
  mutate(re_resids = resid(re_model),
         re_ind_fitted = re_ind_preds$fit,
         re_ave_fitted = re_ave_preds$fit,
         re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
         re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)

# save fitted model and data
saveRDS(mammal_vt, 'fitted-models/vt-data.RDS')
saveRDS(re_model, 'fitted-models/vt-re-model.RDS')


gf_point(re_resids ~ re_ind_fitted, data = mammal_vt)

acf(resid(re_model))

#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_vt) %>%
  gf_fitdistr()

5.3.2 Predictions from Model

mammal_vt <- mammal_vt %>%
  mutate(Habitat = stringr::str_to_sentence(habitat))

re_preds <- gf_point(10^log10vt ~ 10^log10.body.mass,
         color = ~Habitat,
        text = ~animal,
         data = mammal_vt) %>% 
  gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
         color = ~Habitat,
         data = mammal_vt,
         text = ~Habitat) %>%
  gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
            color = ~Habitat, fill = ~Habitat, 
            text = ~Habitat) %>% 
  gf_labs(x = 'Body Mass (kg)', y = 'Tidal Volume (mL)') %>% 
  gf_theme(scale_color_manual(values = my_colors)) %>%
  gf_theme(scale_fill_manual(values = my_colors)) %>%
  gf_refine(scale_x_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)),
            scale_y_continuous(trans = 'log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE)))

# For tidal volume, terrestrial (Stahl 1967): SV (ml) = 7.69 * body mass^1.04 
# For tidal volume, marine (Fahlman 2020, sea lion/cetacean papers): SV (l) = 0.0372 * body mass^0.97 

pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.008, by = 0.1, to = 5200)) %>%
  mutate(vt_stahl = 7.69 * mass^1.04,
         vt_fahlman = 37.2 * mass^0.97)

re_preds <- re_preds %>%
  gf_line(vt_stahl ~ mass, data = pub_models, color = 'black',
          text = ~'Stahl') %>%
  gf_line(vt_fahlman ~ mass, data = pub_models, color = 'black',
          linetype = 'dashed', text = ~'Fahlman') %>%
  # c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
  gf_theme(legend.position = c(0.9, 0.1),
           legend.title = element_blank()) %>%
  gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')

Figure 5.1: Observed and predicted tidal volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) and black dashed line on Fahlman (2020).

gf_point(log10vt ~ re_ind_fitted, data = mammal_vt,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(Vt)',
          x = 'Model-predicted, Species-specific log10(VT)',
          title = 'Mixed-effects Model (RE of Order/Genus)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?

no_species <- mammal_vt %>%
  mutate(species = NA)
re_genus_level_preds <- predict(re_model, 
                    se.fit = TRUE,
                    re.form = NULL,
                    newdata = no_species)

gf_point(log10vt ~ re_genus_level_preds$fit, data = mammal_vt,
         alpha = 0.1) %>%
  gf_labs(y = 'Observed log10(Vt)',
          x = 'Model-predicted, Genus-specific log10(Vt)',
          title = 'Mixed-effects Model (RE of Order/Genus)') %>%
  gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')

5.4 PGLS

5.4.1 Read in Tree Data

#Read in the trees from Upham et al

tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)

all_trees <- list()

for (i in 1:length(tree_files)){
  all_trees[[i]] <- read.tree(paste0(tree_path, '/',
                                     tree_files[i]))
  if (i == 1){
    treeset <- all_trees[[i]]
  }else{
    treeset <- c(treeset, all_trees[[i]])
  }
}

all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
                             function(x) 
                               stringr::str_replace_all(x, pattern = '_',
                                                      replacement = ' '))

# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
  if (t == 1){
    tip_labs <- all_tip_labels[[t]]
  }else{
    tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
  }
}

                            
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_vt %>%
  filter(animal %in% tip_labs) %>%
  droplevels()


taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')

Fit models, one model for every tree in our list.

pgls_models <- list()

for (t in c(1:length(treeset))){
  # make sure there is only one row of data per species (seems dubious??)
  pgls_rep_data <- pgls_data %>% 
    group_by(animal) %>%
    sample_n(1) %>%
    ungroup
  
  
  #Reduce the tree to only include those species in the data set
  refit_tree <- treeset[[t]]
  refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
  refit_tree <- drop.tip(refit_tree, 
                         setdiff(refit_tree$tip.label, 
                                 levels(pgls_rep_data %>% pull(animal))))
  
  #Order the data set so that it is in the same order as the tip labels of the tree
  pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
                             pgls_rep_data,
                             by = c('tree.tip.label' = 'animal'),
                             keep = TRUE)
  
  # fit the model
 pgls_models[[t]] <- tryCatch({
    fittd <- gls(log10vt ~ log10.body.mass * habitat,
                   correlation = corPagel(value = 0.8, 
                                          phy = refit_tree, 
                                          fixed = FALSE, 
                                          form = ~animal),
                   data = pgls_rep_data)
    },
  error = function(cond){
    message(paste('PGLS fit failed for tree', t))
    return(NULL)
  }
  )
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]

Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 3.

Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.

# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)  
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls   <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)

pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
Table continues below
term estimate std.error 2.5 % 97.5 %
(Intercept) 1.638 0.2012 1.231 2.046
log10.body.mass 0.9386 0.07018 0.7966 1.081
habitatland -0.6498 0.1953 -1.045 -0.2543
log10.body.mass:habitatland 0.08718 0.07309 -0.06071 0.2351
lambda fmi
0.0603 0.1065
0.03041 0.07669
0.0659 0.1121
0.03916 0.08542
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
Analysis of Deviance Table (Type II tests) (continued below)
  F num df den df missing info riv
log10.body.mass 2669 1 113235 0.02808 0.0289
habitat 41.01 1 32534 0.05242 0.05532
log10.body.mass:habitat 1.423 1 58266 0.03916 0.04076
  Pr(>F)
log10.body.mass 0
habitat 1.538e-10
log10.body.mass:habitat 0.233

Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally?

pgls_avg <- model.avg(pgls_models, 
                      rank = function(x) 1)

This gives a model that we can more easily make predictions from. The other way (with pool()) is better for getting the ANOVA results.

For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.

Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.

lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.08023527

According to this simple method our estimate is \(\hat{\Lambda} =\) 0.08.

5.4.2 Model Assessment

It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?

6 Additional Predictions (Derived Quantities)

6.1 Cardiac Output

Cardiac output is the product of heart rate (hr) and stroke volume (sv).

We will use a parametric bootstrap to make predictions of cardiac output as a function of log10(body mass) and habitat based on our random-effect models for hr and sv.

n_boot <- 1000

We will make sets of many (1000) predictions for each of the species that occur in both the hr and sv data sets.

# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_hr <- readRDS('fitted-models/hr-data.RDS')
hr_mod <- readRDS('fitted-models/hr-re-model.RDS')
mammal_sv <- readRDS('fitted-models/sv-data.RDS')
sv_mod <- readRDS('fitted-models/sv-re-model.RDS')

cardiac_species <- intersect(pull(mammal_hr, animal), pull(mammal_sv, animal))

The two datasets have 24 species in common.

Make dataset for which to make predictions, containing these species.

cardiac_hr <- mammal_hr %>%
  dplyr::filter(animal %in% cardiac_species) %>%
  dplyr::select(animal, order, genus, species, log10.body.mass, log10hr, habitat) %>%
  mutate(across(where(is.factor), factor))

cardiac_sv <- mammal_sv %>%
  dplyr::filter(animal %in% cardiac_species) %>%
  dplyr::select(animal, order, genus, species, log10.body.mass, log10sv, habitat) 

For each row of the dataset, make a set of 1000 model predictions of cardiac output. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (hr or sv) was originally fitted.

First we define custom functions to make the predictions

predict_hr <- function(.){
  predict(.,
          newdata = cardiac_hr,
          re.form = NULL,
          type = 'response')
}

predict_sv <- function(.){
  predict(.,
          newdata = cardiac_sv,
          re.form = NULL,
          type = 'response')
}

Do the bootstrap(s)

if (!file.exists('fitted-models/hr_boot.RDS')){
hr_boot <- bootMer(hr_mod, 
                   FUN = predict_hr,
                   nsim = n_boot,
                   re.form = NULL,
                   type = 'parametric'
                   )

saveRDS(hr_boot, 'fitted-models/hr_boot.RDS')
}else{
  hr_boot <- readRDS('fitted-models/hr_boot.RDS')
}
if (!file.exists('fitted-models/sv_boot.RDS')){
sv_boot <- bootMer(sv_mod, 
                   FUN = predict_sv,
                   nsim = n_boot,
                   re.form = NULL,
                   type = 'parametric')
saveRDS(sv_boot, 'fitted-models/sv_boot.RDS')
}else{
  sv_boot <- readRDS('fitted-models/sv_boot.RDS')
}

Add boot results to datasets, combine to one dataset, and compute predicted cardiac output (and its median and 95% bootstrap CI):

cardiac_hr <- cardiac_hr %>%
  mutate(boot_pred_hr = unlist(apply(t(hr_boot$t), 1, list), recursive = FALSE))

cardiac_sv <- cardiac_sv %>%
  mutate(boot_pred_sv = unlist(apply(t(sv_boot$t), 1, list), recursive = FALSE))
  
cardiac_preds <- inner_join(cardiac_hr, cardiac_sv,
                         by = c('animal', 'order', 'genus', 'species', 'habitat')) %>%
  mutate(boot_pred_cardiac = map2(boot_pred_hr, boot_pred_sv, function(.x, .y) mapply(prod, 10^.x, 10^.y))) %>%
  mutate(boot_median_cardiac = map_dbl(boot_pred_cardiac, median, na.rm = TRUE),
         boot_lo = map_dbl(boot_pred_cardiac, quantile, probs = 0.025, na.rm = TRUE),
         boot_hi = map_dbl(boot_pred_cardiac, quantile, probs = 0.975, na.rm = TRUE),
         mean_mass = ((10^log10.body.mass.x + 10^log10.body.mass.y) / 2) )
# units: ml/stroke * beats/min

plot results

cardiac_mass <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
         color = ~habitat,
         text = ~animal) %>%
  gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
              color = ~habitat) %>%
  gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
          y = 'Predicted Cardiac Output (mL/min)',
          x = 'Body Mass (kg)') %>%
  gf_theme(scale_color_manual(values = my_colors)) 

ggplotly(cardiac_mass, tooltip = 'text') 
cardiac_mass
cardiac_mass_log <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
         color = ~habitat,
         text = ~animal) %>%
  gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
              color = ~habitat) %>%
  gf_lims(x = c(0.001, 10000)) %>%
  gf_refine(scale_x_continuous(trans='log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))) %>%
  gf_refine(scale_y_continuous(trans='log10',
                               labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))) %>%
  gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
          y = 'Predicted Cardiac Output (mL/min)',
          x = 'Body Mass (kg)') %>%
  gf_theme(scale_color_manual(values = my_colors)) 
  

ggplotly(cardiac_mass_log, tooltip = 'text')
cardiac_mass_log

6.2 Total Ventilation

Total ventilation is the product of breathing rate (fr) and tidal volume (vt).

We will use a parametric bootstrap to make predictions of total ventilation as a function of log10(body mass) and habitat based on our random-effect models for fr and vt.

We will make sets of many (1000) predictions for each of the species that occur in both the fr and vt data sets.

# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_fr <- readRDS('fitted-models/fr-data.RDS')
fr_mod <- readRDS('fitted-models/fr-re-model.RDS')
mammal_vt <- readRDS('fitted-models/vt-data.RDS')
vt_mod <- readRDS('fitted-models/vt-re-model.RDS')

vent_species <- intersect(pull(mammal_fr, animal), pull(mammal_vt, animal))

The two datasets have 31 species in common.

Make datasets for which to make predictions, containing these species.

vent_fr <- mammal_fr %>%
  dplyr::filter(animal %in% vent_species) %>%
  dplyr::select(animal, order, genus, species, log10.body.mass, log10fr, habitat) %>%
  mutate(across(where(is.factor), factor))

vent_vt <- mammal_vt %>%
  dplyr::filter(animal %in% vent_species) %>%
  dplyr::select(animal, order, genus, species, log10.body.mass, log10vt, habitat) 

For each row of the dataset, make a set of 1000 model predictions of ventilation. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (fr or vt) was originally fitted.

First we define custom functions to make the predictions

predict_fr <- function(.){
  predict(.,
          newdata = vent_fr,
          re.form = NULL,
          type = 'response')
}

predict_vt <- function(.){
  predict(.,
          newdata = vent_vt,
          re.form = NULL,
          type = 'response')
}

Do the bootstrap(s)

if (!file.exists('fitted-models/fr_boot.RDS')){
fr_boot <- bootMer(fr_mod, 
                   FUN = predict_fr,
                   nsim = n_boot,
                   re.form = NULL,
                   type = 'parametric'
                   )

saveRDS(fr_boot, 'fitted-models/fr_boot.RDS')
}else{
  fr_boot <- readRDS('fitted-models/fr_boot.RDS')
}
if (!file.exists('fitted-models/vt_boot.RDS')){
vt_boot <- bootMer(vt_mod, 
                   FUN = predict_vt,
                   nsim = n_boot,
                   re.form = NULL,
                   type = 'parametric')
saveRDS(vt_boot, 'fitted-models/vt_boot.RDS')
}else{
  vt_boot <- readRDS('fitted-models/vt_boot.RDS')
}

Add boot results to datasets, combine to one dataset, and compute predicted ventilation (and its median and 95% bootstrap CI):

vent_fr <- vent_fr %>%
  mutate(boot_pred_fr = unlist(apply(t(fr_boot$t), 1, list), recursive = FALSE))

vent_vt <- vent_vt %>%
  mutate(boot_pred_vt = unlist(apply(t(vt_boot$t), 1, list), recursive = FALSE))
  
vent_preds <- inner_join(vent_fr, vent_vt,
                         by = c('animal', 'order', 'genus', 'species', 'habitat')) %>%
  mutate(boot_pred_vent = map2(boot_pred_fr, boot_pred_vt, function(.x, .y) mapply(prod, 10^.x, 10^.y))) %>%
  mutate(boot_median_vent = map_dbl(boot_pred_vent, median, na.rm = TRUE),
         boot_lo = map_dbl(boot_pred_vent, quantile, probs = 0.025, na.rm = TRUE),
         boot_hi = map_dbl(boot_pred_vent, quantile, probs = 0.975, na.rm = TRUE),
         mean_mass = ((10^log10.body.mass.x + 10^log10.body.mass.y) / 2) )
# units: breaths/minute (?) * mL / breath ---> mL/min

plot results

vent_mass <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
         color = ~habitat,
         text = ~animal) %>%
  gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
              color = ~habitat) %>%
  gf_labs(title = 'Expected Ventilation\nfor the species modeled',
          y = 'Predicted Ventilation (mL/min)',
          x = 'Body Mass (kg)') %>%
  gf_theme(scale_color_manual(values = my_colors)) 

ggplotly(vent_mass, tooltip = 'text')
vent_mass
vent_mass_log <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
         color = ~habitat,
         text = ~animal) %>%
  gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
              color = ~habitat)%>%
  gf_lims(x = c(0.001, 10000)) %>%
  gf_refine(scale_x_log10(#trans = 'log10',
                              labels = scales::label_comma(accuracy = 0.001,
                                                           drop0trailing = TRUE)),
            scale_y_log10(labels = scales::label_comma(accuracy = 0.001,
                                                            drop0trailing = TRUE))) %>%
  gf_labs(title = 'Expected Ventilation\nfor the species modeled',
          y = 'Predicted Ventilation (mL/min)',
          x = 'Body Mass (kg)') %>%
  gf_theme(scale_color_manual(values = my_colors)) 

ggplotly(vent_mass_log, tooltip = 'text')
vent_mass_log